import os
os.chdir(os.curdir) # 转到当前目录
import matplotlib.pyplot as plt
import numpy as np
from  episongreddy import episongreddy

greed_reward = np.zeros(1000)
ep1_reward = np.zeros(1000)
ep2_reward = np.zeros(1000)
soft_reward = np.zeros(1000)

n = 2000 # number of the task has to run

for i in range(n):
	q = np.random.normal(0,1,10) #rewards behind each door
	
	greed = episongreddy(0,q) #
	greed_reward += greed.calc_rewards(1000, 'grddy')
	
	ep1 = episongreddy(0.01,q)
	ep1_reward += ep1.calc_rewards(1000, 'greedy')
	
	ep2 = episongreddy(0.1,q)
	ep2_reward += ep2.calc_rewards(1000, 'grddy')
	
	soft = episongreddy(0.1, q)
	soft_reward += soft.calc_rewards(1000, 'softmax')
	
plt.plot(greed_reward/n, c='r', alpha =1, label=r'$\epsilon = 0$')
plt.plot(ep1_reward/n, c='g', alpha=1, label=r'$\epsilon = 0.01$')
plt.plot(ep2_reward/n, c='b', alpha=1, label=r'$\epsilon=0.1$')
plt.plot(soft_reward/n, c='y', alpha=1, label=r'softmax')

plt.xlabel('Step')
plt.ylabel('Average Rewards')
plt.legend(loc=4)
plt.xlim(-25,1000)

plt.show()
